---
title: "Y1 Study Skills Intervention - Data Cleaning and Analysis"
author: "Fernando Rodriguez"
date: "Last updated on 07-07-18 by FR"
output:
  html_document: default
  pdf_document: default
created on: 3-17-17
editor_options: 
  chunk_output_type: console
---

```{r setup,}
knitr::opts_chunk$set(echo = TRUE)
```
# Step 1 - DATA IMPORTING AND CLEANING

```{r}
# Installing relevant packages
## Loading Libraries
library(splitstackshape)
library(RColorBrewer)
library(ggplot2)
library(reshape2)
library(plyr)
library(doBy)
library(dplyr)
library(psych)
library(gridExtra)
library(grid)
library(sjstats) 
library(fields)
library(fifer)
library(skimr) # for quickly summarizing data

```


## Importing Year 1 Data
```{r}
# Importing Year 1 Data
ssdata <- read.csv("Study Skills Y1 Raw Data.csv", header = TRUE)

```



## Demographic Variables - Checking and Cleaning
```{r}
# First gen status
table(ssdata$firstgeneration_rec)
# creating variable firstgeneration_rec by removing 9 (other/unknown)
ssdata$firstgeneration_rec2 <-NA
ssdata$firstgeneration_rec2[ssdata$firstgeneration_rec == 0] <- 0
ssdata$firstgeneration_rec2[ssdata$firstgeneration_rec == 1] <- 1
table(ssdata$firstgeneration_rec2, exclude = NULL)


# Low income status
table(ssdata$lowincomeflag_rec)

```

## Study Strategy  - Checking and Cleaning
```{r}
## Cleaning variables that capture the different study strategies 
# First, I assign NAs to the black colums
# pre-
table(ssdata$pre_studystrat)
ssdata$pre_studystrat[ssdata$pre_studystrat ==""]<-NA
head(ssdata$pre_studystrat)

#post-
head(ssdata$post_studystrat)
ssdata$post_studystrat[ssdata$post_studystrat ==""]<-NA
head(ssdata$post_studystrat)
```

## Ethnicity & URM Status - Checking and Cleaning
```{r}
# Ethnicity
str(ssdata$ethnicity_rec)
table(ssdata$ethnicity_rec)

table(ssdata$urm)
table(ssdata$urm, ssdata$section)

```

## Gender - Checking and Cleaning
```{r, echo = F}
# Gender
str(ssdata$gender_rec)
ssdata$gender_rec <- as.factor(ssdata$gender_rec)
str(ssdata$gender_rec)

ssdata$gender_rec <- factor(ssdata$gender_rec,
                  levels = c(0, 1, 9),
                  labels = c("Female", "Male", "Other/Unknown"))


# creating variable gender_rec2 by removing 9 (other/unknown)
table(ssdata$gender_rec)
ssdata$gender_rec2 <- NA
ssdata$gender_rec2[ssdata$gender_rec == "Female"] <- 0
ssdata$gender_rec2[ssdata$gender_rec == "Male"] <- 1
table(ssdata$gender_rec2)

ssdata$gender_rec2 <- factor(ssdata$gender_rec2,
                  levels = c(0, 1),
                  labels = c("Female", "Male"))
table(ssdata$gender_rec2)

str(ssdata$gender_rec2)
```

## Years at Institution - Checking and Cleaning
```{r, echo = F}
table(ssdata$firstregacadyr)
1   +   11   +   46   +  388   +   98

table(ssdata$firstregacadyr, ssdata$section)


yeartable <- with(ssdata, table(firstregacadyr, section))
prop.table(yeartable, margin = 2)*100    

# recoding years at institution to numeric scale
ssdata$yearsenrolled <- NA
ssdata$yearsenrolled[ssdata$firstregacadyr == "2015-16"] <- 1
ssdata$yearsenrolled[ssdata$firstregacadyr == "2014-15"] <- 2
ssdata$yearsenrolled[ssdata$firstregacadyr == "2013-14"] <- 3
ssdata$yearsenrolled[ssdata$firstregacadyr == "2012-13"] <- 4
ssdata$yearsenrolled[ssdata$firstregacadyr == "2011-12"] <- 5

table(ssdata$yearsenrolled, ssdata$section)

# anova
describeBy(ssdata$yearsenrolled, ssdata$section)
yearsfit <- aov(yearsenrolled~section, data = ssdata)
drop1(yearsfit, ~., test = "F")
TukeyHSD(yearsfit)
confint(yearsfit)

# Boxplot of grades by first academic year
ggplot(ssdata, aes(x=yearsenrolled, y=grade_rec, group = yearsenrolled), na.omit = TRUE) + geom_boxplot() + stat_summary(fun.y=mean, geom="point", shape=5, size=4)  + 
  labs(x = "Years Enrolled", y = "Final Grade", title = "Final Grade by Years Enrolled") + 
  scale_y_continuous(limits = c(0, 13), breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) + theme(axis.text.x = element_text(angle = 60, hjust = 1))

```


## Splitting Study Strategies Question
```{r}
## Using `cSplit` in the `splitstackshape` package to split up the variable `pre_studystrat`

# pre-survey
ssdata <- cSplit(ssdata, "pre_studystrat", ",")


# checking output
table(ssdata$pre_studystrat_01)
table(ssdata$pre_studystrat_02)
table(ssdata$pre_studystrat_03)
table(ssdata$pre_studystrat_04)
table(ssdata$pre_studystrat_05)
table(ssdata$pre_studystrat_06)
table(ssdata$pre_studystrat_07)
table(ssdata$pre_studystrat_08)
table(ssdata$pre_studystrat_09)
table(ssdata$pre_studystrat_10)

# post-survey
ssdata <- cSplit(ssdata, "post_studystrat", ",")

# checking output
table(ssdata$post_studystrat_1)
table(ssdata$post_studystrat_2)
table(ssdata$post_studystrat_3)
table(ssdata$post_studystrat_4)
table(ssdata$post_studystrat_5)
table(ssdata$post_studystrat_6)
table(ssdata$post_studystrat_7)
table(ssdata$post_studystrat_8)
table(ssdata$post_studystrat_9)
table(ssdata$post_studystrat10)
table(ssdata$post_studystrat11)
```


## Coding the different study strateiges into their own variables
## Top 3 strategies only
## 1 = self-test
```{r}

# pre- self-test
# creating a new list to name each of the study strategies variables (all selected)
prestudycol <- c("pre_studystrat_01", "pre_studystrat_02", "pre_studystrat_03")

str(prestudycol)
str(ssdata$pre_studystrat_01)

# creating the variable `pre_ss_selftest` to capture the value 1
# using the prestudycol list to pull the value 1
ssdata$pre_ss_selftest <- ((rowSums(ssdata[, prestudycol, with = FALSE] == 1, na.rm = T) > 0 )*1)

table(ssdata$pre_ss_selftest)
# checking frequenices by coursection
table(ssdata$pre_ss_selftest, ssdata$pre_survsection_status) 



# post- self-test
# listing the variables (all selected)
poststudycol <- c("post_studystrat_1", "post_studystrat_2", "post_studystrat_3")

ssdata$post_ss_selftest <-((rowSums( ssdata[ , poststudycol, with = FALSE] == 1, na.rm = T) >0)*1)

table(ssdata$post_ss_selftest)
# checking frequencies
table(ssdata$post_ss_selftest, ssdata$section) 



```


## 2 = Use flashcards
```{r}

# pre- flashcards
# creating the variable `pre_ss_flashc` to capture the value 2
# using the prestudycol list to pull the value 2
ssdata$pre_ss_flashc <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 2, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$pre_ss_flashc, ssdata$pre_survsection_status) 


# post- flashcards
# creating the variable `post_ss_flashc` to capture the value 2
# using the prestudycol list to pull the value 2
ssdata$post_ss_flashc <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 2, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$post_ss_flashc, ssdata$section) 
```



## 3 = Reread chapters, articles, notes, etc.
```{r}
# pre- reread
# creating the variable `pre_ss_reread` to capture the value 3
# using the prestudycol list to pull the value 3
ssdata$pre_ss_reread <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 3, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$pre_ss_reread, ssdata$pre_survsection_status) 


# post- reread
# creating the variable `post_ss_reread` to capture the value 3
# using the prestudycol list to pull the value 3
ssdata$post_ss_reread <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 3, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$post_ss_reread, ssdata$section) 
```


## 4 = Underlying or highlighting (high)
```{r}
# pre- high
# creating the variable `pre_ss_high`to capture the value 4
# using the prestudycol list to pull the value 4
ssdata$pre_ss_high <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 4, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$pre_ss_high, ssdata$pre_survsection_status) 


# post- high
# creating the variable `post_ss_high` to capture the value 4
# using the prestudycol list to pull the value 4
ssdata$post_ss_high <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 4, na.rm = T) >0)*1)

# checking frequenices by coursection
table(ssdata$post_ss_high, ssdata$section) 
```


## 5 = Recopy notes
```{r}

# pre- recopy
ssdata$pre_ss_recopy <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 5, na.rm = T) >0)*1)

table(ssdata$pre_ss_recopy, ssdata$pre_survsection_status) 


# post- recopy
ssdata$post_ss_recopy <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 5, na.rm = T) >0)*1)

table(ssdata$post_ss_recopy, ssdata$section) 
```


## 6 = Condensing or summarizing notes
```{r}

# pre- condense
ssdata$pre_ss_condense <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 6, na.rm = T) >0)*1)

table(ssdata$pre_ss_condense, ssdata$pre_survsection_status) 


# post- condense
ssdata$post_ss_condense <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 6, na.rm = T) >0)*1)

table(ssdata$post_ss_condense, ssdata$section) 
```

## 7 = Recopy notes from memory

```{r}

# pre- recopymem
ssdata$pre_ss_recopymem <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 7, na.rm = T) >0)*1)

table(ssdata$pre_ss_recopymem, ssdata$pre_survsection_status) 


# post- recopymem
ssdata$post_ss_recopymem <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 7, na.rm = T) >0)*1)

table(ssdata$post_ss_recopymem, ssdata$section) 
```


## 8 = Make diagrams

```{r}

# pre- daig
ssdata$pre_ss_diag <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 8, na.rm = T) >0)*1)

table(ssdata$pre_ss_diag, ssdata$pre_survsection_status) 


# post- diag
ssdata$post_ss_diag <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 8, na.rm = T) >0)*1)

table(ssdata$post_ss_diag, ssdata$section) 
```


## 9 = Study with friends

```{r}
# pre- friends
ssdata$pre_ss_friends <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 9, na.rm = T) >0)*1)

table(ssdata$pre_ss_friends, ssdata$pre_survsection_status) 


# post- friends
ssdata$post_ss_friends <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 9, na.rm = T) >0)*1)

table(ssdata$post_ss_friends, ssdata$section) 
```



## 10 = Absobing info (cramming) night before
```{r}
# pre- cram
ssdata$pre_ss_cram <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 10, na.rm = T) >0)*1)

table(ssdata$pre_ss_cram, ssdata$pre_survsection_status) 


# post- cram
ssdata$post_ss_cram <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 10, na.rm = T) >0)*1)

table(ssdata$post_ss_cram, ssdata$section) 
```


## 11 = Watch videos
```{r}
# pre- videos
ssdata$pre_ss_videos <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 11, na.rm = T) >0)*1)

table(ssdata$pre_ss_videos, ssdata$pre_survsection_status) 


# post- videos
ssdata$post_ss_videos <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 11, na.rm = T) >0)*1)

table(ssdata$post_ss_videos, ssdata$section) 
```

## 12 = Other

```{r}

# pre- other
ssdata$pre_ss_other <-((rowSums(ssdata[ , prestudycol, with = FALSE] == 12, na.rm = T) >0)*1)

table(ssdata$pre_ss_other, ssdata$pre_survsection_status) 


# post- other
ssdata$post_ss_other <-((rowSums(ssdata[ , poststudycol, with = FALSE] == 12, na.rm = T) >0)*1)

table(ssdata$post_ss_other, ssdata$section) 
```


# Step 2 - DATA EXPORTING

## Exporting Y1 Pre-Survey Study Strat and URM status variables to csv file
This is done in order to merge these variables with the same Y2 variables for the following two manuscript result sections: 
1) study behaviors at week 1 (Table 2)
2) Percent of students’ study decisions (Table 3)
```{r}
studystratPre <- subset(ssdata, select = c(roster_randomid, urm, gb_status, pre_status,
                                           post_status,
                                           pre_studypat, post_studypat,
                                           pre_ss_selftest, post_ss_selftest,
                                           pre_ss_videos,
                                           pre_ss_condense, pre_ss_diag, pre_ss_high,
                                           pre_ss_flashc, pre_ss_friends, pre_ss_recopy,
                                           pre_ss_recopymem, pre_ss_other, pre_studydecide_chall, pre_studydecide_soon, pre_studydecide_cons, pre_studydecide_easy,
 pre_studydecide_imp, pre_studydecide_interest, pre_studydecide_sched,  pre_studydecide_other, post_studydecide_chall, post_studydecide_soon,
 post_studydecide_cons, post_studydecide_easy, post_studydecide_imp,
 post_studydecide_interest, post_studydecide_sched, post_studydecide_other, grade_rec))

studystratPre$year = 1
table(studystratPre$year)



write.csv(studystratPre, 'Year1studystrat.csv', row.names = FALSE)

```




# Step 3 - SAMPLE DESCRIPTIVES
## Demographics and Prior Academic Background
## (Table 1)
```{r}
# Section
table(ssdata$section)
        
# Age
skim(ssdata$age)
        
# Gender
table(ssdata$gender_rec)
        
# Ethnicity
table(ssdata$ethnicity_rec2)
        
# URM
table(ssdata$urm)
        
# First Gen
table(ssdata$firstgeneration_rec2)
        
# Low Income
table(ssdata$lowincomeflag_rec)
        
# years at UCI
table(ssdata$firstregacadyr)
        
# SAT total
skim(ssdata$sattotalscore)
        
# College Cumulative GPA
skim(ssdata$gpacumulative)
        
# Final Course Grade
skim(ssdata$grade_rec)
        
```


# Step 4 - DATA ANALYSIS AND VISUALIZATIONS
## Impact of the study skills intervention on spacing and self-testing
## Spacing Results
```{r, echo = F}
# obtaining proportions of the strategies students reported using at pre

# spacing vs. massing
table(ssdata$pre_studypat)
prop.table(table(ssdata$pre_studypat))


# comparing numbers to the study strategy - cramming
table(ssdata$pre_studypat, ssdata$pre_ss_cram)


# PRE
# creating a cramming score to include those who selected one or both vs. none

ssdata$pre_spacing_recode <-NA
ssdata$pre_spacing_recode[ssdata$pre_studypat == 0 & ssdata$pre_ss_cram == 1] <- 0 # cram, cram
table(ssdata$pre_spacing_recode)

ssdata$pre_spacing_recode[ssdata$pre_studypat == 1 & ssdata$pre_ss_cram == 1] <- 0 # space, cram
table(ssdata$pre_spacing_recode)

ssdata$pre_spacing_recode[ssdata$pre_studypat == 0 & ssdata$pre_ss_cram == 0] <- 0 # cram, cram not-selected
table(ssdata$pre_spacing_recode)

ssdata$pre_spacing_recode[ssdata$pre_studypat == 1 & ssdata$pre_ss_cram == 0] <- 1 # space, cram not-selected
table(ssdata$pre_spacing_recode) # new variable
table(ssdata$pre_studypat) # comparing to old variable


# proportion table of spacing vs. cramming
prop.table(table(ssdata$pre_spacing_recode)) # new variable

# overall t-test
t.test(ssdata$grade_rec~ssdata$pre_spacing_recode)

# chisquare by section
chisq.test(ssdata$section, ssdata$pre_spacing_recode)
chisq.test(ssdata$section, ssdata$post_spacing_recode)


# t-test for section a
ssdata_secA <- subset(ssdata, section == "Intervention Section")
t.test(ssdata_secA$grade_rec~ssdata_secA$pre_spacing_recode)

# t-test for section b
ssdata_secB <- subset(ssdata, section == "Control Section 1")
t.test(ssdata_secB$grade_rec~ssdata_secB$pre_spacing_recode)

# t-test for section c
ssdata_secC <- subset(ssdata, section == "Control Section 2")
t.test(ssdata_secC$grade_rec~ssdata_secC$pre_spacing_recode)



# POST
# doing it for post as well
ssdata$post_spacing_recode <-NA
ssdata$post_spacing_recode[ssdata$post_studypat == 0 & ssdata$post_ss_cram == 1] <- 0 # cram, cram
table(ssdata$post_spacing_recode)

ssdata$post_spacing_recode[ssdata$post_studypat == 1 & ssdata$post_ss_cram == 1] <- 0 # space, cram
table(ssdata$post_spacing_recode)

ssdata$post_spacing_recode[ssdata$post_studypat == 0 & ssdata$post_ss_cram == 0] <- 0 # cram, cram not-selected
table(ssdata$post_spacing_recode)

ssdata$post_spacing_recode[ssdata$post_studypat == 1 & ssdata$post_ss_cram == 0] <- 1 # space, cram not-selected
table(ssdata$post_spacing_recode) # new variable
table(ssdata$post_studypat) # comparing to old variable


# proportion table of spacing vs. cramming
prop.table(table(ssdata$post_spacing_recode)) # new variable

# overall t-test
t.test(ssdata$grade_rec~ssdata$post_spacing_recode)



# t-test for section a
ssdata_secA <- subset(ssdata, section == "Intervention Section")
t.test(ssdata_secA$grade_rec~ssdata_secA$post_spacing_recode)

# t-test for section b
ssdata_secB <- subset(ssdata, section == "Control Section 1")
t.test(ssdata_secB$grade_rec~ssdata_secB$post_spacing_recode)

# t-test for section c
ssdata_secC <- subset(ssdata, section == "Control Section 2")
t.test(ssdata_secC$grade_rec~ssdata_secC$post_spacing_recode)


```



## Descriptives of study strateiges and final course grades
## Pre (Not included manuscript, but included here for public reference)
```{r, echo = F}
# study strategies

# spacing vs. cramming
table(ssdata$pre_spacing_recode)
prop.table(table(ssdata$pre_spacing_recode))
t.test(ssdata$grade_rec~ssdata$pre_spacing_recode)


# self-test
table(ssdata$pre_ss_selftest)
prop.table(table(ssdata$pre_ss_selftest))
t.test(ssdata$grade_rec~ssdata$pre_ss_selftest)

# flashcards
prop.table(table(ssdata$pre_ss_flashc))
t.test(ssdata$grade_rec~ssdata$pre_ss_flashc)

# recopy mem
prop.table(table(ssdata$pre_ss_recopymem))
t.test(ssdata$grade_rec~ssdata$pre_ss_recopymem)

# re-read
prop.table(table(ssdata$pre_ss_reread))
t.test(ssdata$grade_rec~ssdata$pre_ss_reread)

# underlying or highlighting
prop.table(table(ssdata$pre_ss_high))
t.test(ssdata$grade_rec~ssdata$pre_ss_high)

# recopying notes
prop.table(table(ssdata$pre_ss_recopy))
t.test(ssdata$grade_rec~ssdata$pre_ss_recopy)

# condensing or summarizing notes
prop.table(table(ssdata$pre_ss_condense))
t.test(ssdata$grade_rec~ssdata$pre_ss_condense)

# make diagrams
prop.table(table(ssdata$pre_ss_diag))
t.test(ssdata$grade_rec~ssdata$pre_ss_diag)

# study with friends
prop.table(table(ssdata$pre_ss_friends))
t.test(ssdata$grade_rec~ssdata$pre_ss_friends)

# watch videos
prop.table(table(ssdata$pre_ss_videos))
t.test(ssdata$grade_rec~ssdata$pre_ss_videos)

# other
prop.table(table(ssdata$pre_ss_other))
t.test(ssdata$grade_rec~ssdata$pre_ss_other)


```


## Descriptives of study strateiges and final course grades
## Post 
## (Table 5)
```{r, echo = F}
# study strategies


# spacing vs. cramming
table(ssdata$post_spacing_recode)
prop.table(table(ssdata$post_spacing_recode))
describeBy(ssdata$grade_rec, ssdata$post_spacing_recode, mat = T)
ssdata %>% group_by(post_spacing_recode) %>% summarize(mean=mean(grade_rec), sd=sd(grade_rec))
t.test(ssdata$grade_rec~ssdata$post_spacing_recode)
t.test(ssdata$grade_rec~ssdata$post_spacing_recode, var.equal=TRUE)



# self-test 
prop.table(table(ssdata$post_ss_selftest))
describeBy(ssdata$grade_rec, ssdata$post_ss_selftest, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_selftest)

# flashcards
prop.table(table(ssdata$post_ss_flashc))
describeBy(ssdata$grade_rec, ssdata$post_ss_flashc, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_flashc)

# recopymem
prop.table(table(ssdata$post_ss_recopymem))
describeBy(ssdata$grade_rec, ssdata$post_ss_recopymem, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_recopymem)

# re-read
prop.table(table(ssdata$post_ss_reread))
describeBy(ssdata$grade_rec, ssdata$post_ss_reread, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_reread)

# underlying or highlighting
prop.table(table(ssdata$post_ss_high))
describeBy(ssdata$grade_rec, ssdata$post_ss_high, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_high)


# recopying notes
prop.table(table(ssdata$post_ss_recopy))
describeBy(ssdata$grade_rec, ssdata$post_ss_recopy, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_recopy)


# condensing or summarizing notes
prop.table(table(ssdata$post_ss_condense))
describeBy(ssdata$grade_rec, ssdata$post_ss_condense, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_condense)


# make diagrams
prop.table(table(ssdata$post_ss_diag))
describeBy(ssdata$grade_rec, ssdata$post_ss_diag, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_diag)


# study with friends
prop.table(table(ssdata$post_ss_friends))
describeBy(ssdata$grade_rec, ssdata$post_ss_friends, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_friends)



# watch videos
prop.table(table(ssdata$post_ss_videos))
describeBy(ssdata$grade_rec, ssdata$post_ss_videos, mat = T)
t.test(ssdata$grade_rec~ssdata$post_ss_videos)

# other
prop.table(table(ssdata$post_ss_other))
t.test(ssdata$grade_rec~ssdata$post_ss_other)

```



# Pre-and Post Comparions for Spacing
```{r}

# PRE descriptive information about proportion students selcting 'spaced' by section
table(ssdata$pre_spacing_recode)
219 + 325
table(ssdata$pre_spacing_recode, ssdata$section)

# recoding section as numeric values (Sec A = 0, B = 1, C = 2)
ssdata$section_numeric <- NA
ssdata$section_numeric[ssdata$section =="Intervention Section"] <- 0
ssdata$section_numeric[ssdata$section =="Control Section 1"] <- 1
ssdata$section_numeric[ssdata$section =="Control Section 2"] <- 2

# checking recode results
table(ssdata$section)
table(ssdata$section_numeric)

tb1 = table(ssdata$section_numeric, ssdata$pre_spacing_recode)
tb1
47+85+60+143+84+125
chisq.test(tb1)



# chisquare test for section
chisq.test(ssdata$section, ssdata$pre_spacing_recode)
chisq.test(ssdata$section, ssdata$post_spacing_recode)


# post-hoc

# Intervention vs Control 1
posthoc_secAvB <- subset(ssdata, section != "Control Section 2")
table(posthoc_secAvB$section)
132 + 203
chisq.test(posthoc_secAvB$section, posthoc_secAvB$post_spacing_recode)

# Intervention vs Control 2
posthoc_secAvC <- subset(ssdata, section != "Control Section 1")
table(posthoc_secAvC$section)
132 + 209
chisq.test(posthoc_secAvC$section, posthoc_secAvC$post_spacing_recode)


# chisquare test method 2

table(ssdata$post_spacing_recode)
table(ssdata$section_numeric, ssdata$post_spacing_recode)

tb2 = table(ssdata$section_numeric, ssdata$post_spacing_recode)
tb2
44+88+88+115+111+98
chisq.test(tb2)

# percent change
# Section A
# pre 
79/(53+79)
# post
88/(88+44)
66.66 - 59.84

# Section B
# pre 
132/(71 + 132)
# post 
115/(88 + 115)
56.65 - 65.02

# Section C
#  pre
114/(95+114)
# post
98/(111+98)
46.88-54.54

```



# Self-Test Percentage Change
```{r}
# self-test pre
tb3 = table(ssdata$section_numeric, ssdata$pre_ss_selftest)
tb3
chisq.test(tb3)
chisq.test(ssdata$pre_ss_selftest, ssdata$section)

# self-test post
tb4 = table(ssdata$section_numeric, ssdata$post_ss_selftest)
tb4
chisq.test(tb4)

# Sec A change

```



## Study Decisions (Post on Grade)
```{r}

# Pre-Post Differences

# basic line graph of pre- and post- spacing
sptab <- subset(ssdata, select=c(pre_spacing_recode, post_spacing_recode, section))

# removing NAs
sptab <- subset(sptab, pre_spacing_recode >=0 & post_spacing_recode >=0)


# Reshaping data using reshape2 library
spmelted <- melt(sptab, id.vars=c("section"))


# labeling variable to say "pre-" and "post"
spmelted$variable <- factor(spmelted$variable, labels = c("Pre", "Post"))


library(Rmisc)
spmelted <- summarySE(spmelted, measurevar = "value", groupvars = c("variable", "section"))




# Line graph of pre-post changes in spacing by section
spaceplot = ggplot(spmelted, aes(x=variable, y=value, group = section, colour = section)) +
    geom_line(size = 1.5) +  geom_point(size= 3, shape = 21, colour = "black", fill = "white") + coord_fixed(ratio = 3.5) + 
  labs(x = "Self-Reported Spacing", y = "Proportion of Students Selecting Spacing") + 
  scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) + 
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15)) + 
  scale_colour_grey() +  labs(colour = "Section") 

spaceplot
```




```{r}
# PRE descriptive information about proportion of students selecting self-testing 
table(ssdata$pre_ss_selftest, ssdata$section)

tb3 = table(ssdata$section_numeric, ssdata$pre_ss_selftest)
tb3
36+96+56+147+49+160

chisq.test(tb3)


# A vs B
table(posthoc_secAvB$section)
132 + 203
chisq.test(posthoc_secAvB$section, posthoc_secAvB$post_ss_selftest)

# A v C
table(posthoc_secAvC$section)
132 + 209
chisq.test(posthoc_secAvC$section, posthoc_secAvC$post_ss_selftest)





# POST descriptive information about proportion students selcting 'spaced' by section

table(ssdata$post_ss_selftest)
table(ssdata$post_ss_selftest, ssdata$section)

tb4 = table(ssdata$section_numeric, ssdata$post_ss_selftest)
tb4
33+99+99+104+87+122

chisq.test(tb4)
```



```{r, echo = F}
# basic line graph of pre- and post- self-testing

sttab <- subset(ssdata, select=c(pre_ss_selftest, post_ss_selftest, section))


# removing NAs
sttab <- subset(sttab, pre_ss_selftest >=0 & post_ss_selftest >=0)


# Reshaping data using reshape2 library
stmelted <- melt(sttab, id.vars=c("section"))


# labeling variable to say "pre-" and "post"
stmelted$variable <- factor(stmelted$variable, labels = c("Pre", "Post"))


stmelted <- summarySE(stmelted, measurevar = "value", groupvars = c("variable", "section"))




# Line graph of pre-post changes in self-testing by section
selftestplot = ggplot(stmelted, aes(x=variable, y=value, group = section, colour = section)) +
    geom_line(size = 1.5) +  geom_point(size= 3, shape = 21, colour = "black", fill = "white") + coord_fixed(ratio = 3.5) + 
  labs(x = "Self-Reported Self-Testing", y = "Proportion of Students Selecting Self-Testing") + 
  scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) + 
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15)) + 
  scale_colour_grey() +  labs(colour = "Section") 


grid.arrange(spaceplot, selftestplot, ncol = 2)

g <-grid.arrange(spaceplot, selftestplot, ncol = 2)

ggsave(file="Y1 spacing selftest.png", width = 8, height = 6, units = "in", g)

```






### Examining non-effective strategies
# Re-read
```{r, echo = F}
# Pre-Post Differences

# basic line graph of pre- and post- spacing
sptab <- subset(ssdata, select=c(pre_ss_reread, post_ss_reread, section))

# removing NAs
sptab <- subset(sptab, pre_ss_reread >=0 & post_ss_reread >=0)


# Reshaping data using reshape2 library
spmelted <- melt(sptab, id.vars=c("section"))


# labeling variable to say "pre-" and "post"
spmelted$variable <- factor(spmelted$variable, labels = c("Pre", "Post"))


library(Rmisc)
spmelted <- summarySE(spmelted, measurevar = "value", groupvars = c("variable", "section"))




# Line graph of pre-post changes in spacing by section # 
rereadplot = ggplot(spmelted, aes(x=variable, y=value, group = section, colour = section)) +
    geom_line(size = 1.5) +  geom_point(size= 3, shape = 21, colour = "black", fill = "white") + coord_fixed(ratio = 3.5) + 
  labs(x = "Self-Reported Re-Reading", y = "Proportion of Students Selecting Re-Reading") + 
  scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) + 
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15)) + 
  scale_colour_grey() +  labs(colour = "Section") 

rereadplot

```


# Condense notes

```{r, echo = F}
# Pre-Post Differences

# basic line graph of pre- and post- spacing
sptab <- subset(ssdata, select=c(pre_ss_condense, post_ss_condense, section))

# removing NAs
sptab <- subset(sptab, pre_ss_condense >=0 & post_ss_condense >=0)


# Reshaping data using reshape2 library
spmelted <- melt(sptab, id.vars=c("section"))


# labeling variable to say "pre-" and "post"
spmelted$variable <- factor(spmelted$variable, labels = c("Pre", "Post"))


library(Rmisc)
spmelted <- summarySE(spmelted, measurevar = "value", groupvars = c("variable", "section"))




# Line graph of pre-post changes in spacing by section # spaceplot = 
condenseplot = ggplot(spmelted, aes(x=variable, y=value, group = section, colour = section)) +
    geom_line(size = 1.5) +  geom_point(size= 3, shape = 21, colour = "black", fill = "white") + coord_fixed(ratio = 3.5) + 
  labs(x = "Self-Reported Condensing Notes", y = "Proportion of Students Condensing Notes") + 
  scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) + 
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15)) + 
  scale_colour_grey() +  labs(colour = "Section") 

condenseplot

grid.arrange(videoplot, condenseplot, ncol = 2)

g <-grid.arrange(videoplot, condenseplot, ncol = 2)

ggsave(file="Y1 video condense plot.png", width = 8, height = 6, units = "in", g)
```


# Flashcards

```{r, echo = F}
# Pre-Post Differences

# basic line graph of pre- and post- spacing
sptab <- subset(ssdata, select=c(pre_ss_flashc, post_ss_flashc, section))

# removing NAs
sptab <- subset(sptab, pre_ss_flashc >=0 & post_ss_flashc >=0)


# Reshaping data using reshape2 library
spmelted <- melt(sptab, id.vars=c("section"))


# labeling variable to say "pre-" and "post"
spmelted$variable <- factor(spmelted$variable, labels = c("Pre", "Post"))


library(Rmisc)
spmelted <- summarySE(spmelted, measurevar = "value", groupvars = c("variable", "section"))




# Line graph of pre-post changes in spacing by section # spaceplot = 
flashcardplot = ggplot(spmelted, aes(x=variable, y=value, group = section, colour = section)) +
    geom_line(size = 1.5) +  geom_point(size= 3, shape = 21, colour = "black", fill = "white") + coord_fixed(ratio = 3.5) + 
  labs(x = "Self-Reported Flashcard Use", y = "Proportion of Students Selecting Flashcards") + 
  scale_y_continuous(limits = c(0, 1), breaks = c(0, .2, .4, .6, .8, 1)) + 
  theme(axis.text.x = element_text(size = 12), axis.title.x = element_text(size = 15)) + 
  scale_colour_grey() +  labs(colour = "Section") 


grid.arrange(rereadplot, flashcardplot, ncol = 2)

g <-grid.arrange(rereadplot, flashcardplot, ncol = 2)

ggsave(file="Y1 reread flashcard.png", width = 8, height = 6, units = "in", g)


```





# SPACING PATTERNS

```{r}
# coding data on 4 profiles
# 3. Maintained Strategy
# 2. Adopted Strategy
# 1. Stoped Utilizing Strategy
# 0. Never Utlized Strategy

ssdata$spacing_cat <- as.numeric(ssdata$spacing_cat)
ssdata$spacing_cat[ssdata$pre_spacing_recode == 1  & ssdata$post_spacing_recode == 1] <- 3
ssdata$spacing_cat[ssdata$pre_spacing_recode == 0 & ssdata$post_spacing_recode == 1] <- 2
ssdata$spacing_cat[ssdata$pre_spacing_recode == 1 & ssdata$post_spacing_recode == 0] <- 1
ssdata$spacing_cat[ssdata$pre_spacing_recode == 0 & ssdata$post_spacing_recode == 0] <- 0


table(ssdata$spacing_cat)
168 + 75 +  51 + 250 
table(ssdata$section_numeric)
132 + 203 + 209


ggplot(ssdata, aes(x= as.factor(spacing_cat), y=grade_rec, na.omit = TRUE)) +
  geom_boxplot() + stat_summary(fun.y=mean, geom="point", shape=5, size=4) + 
  labs(x = "Spacing Strategy Adoption", y = "Final Grade", title = "Final Grade by Strat Adoption") +
  scale_y_continuous(limits = c(0, 13), 
                     breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) 
```



## Spacing Patterns Porportions
## (Table 4)
```{r}

# basic line graph of pre- and post- spacing
spacetab <- subset(ssdata, select=c(spacing_cat, section))



# Reshaping data using reshape2 library
spacemelted <- melt(spacetab, id.vars=c("section"))
 

spacemelted$spacing_catfac <- factor(spacemelted$value, 
                             labels = c("Never Used", "Decreased",
                                        "Increased", "Maintained"))




library(Rmisc)
spacemelted <- summarySE(spacemelted, measurevar = "value", 
                      groupvars = c("section", "spacing_catfac"))


# reversing category order
spacemelted$spacing_catfac <- factor(spacemelted$spacing_catfac, 
                           levels = rev(levels(spacemelted$spacing_catfac)))

# obtatining proportions for category by section

spacemelted$n <- spacemelted$N

spacemelted <- spacemelted %>%
  group_by(section, spacing_catfac) %>%
  summarise(n) %>%
  mutate(freq = n / sum(n)*100)



# Line graph of pre-post changes in spacing by section # spaceplot = 

# Version 1
space1 <-  ggplot(spacemelted, aes(x=section, y = freq)) + 
  geom_bar(aes(fill = spacing_catfac), stat = "identity") +
   labs(x = "Section", y = "Percent", fill = "", title = "Spacing Patterns")  +
   scale_fill_manual(values=c("#669966", "#99CC99", "#FF6666", "#CC0000"))


space1

ggsave("Y1 Space Categories.png", width = 8, height = 4, units = "in", space1)


spacemelted
table(ssdata$section)

chisq.test(ssdata$section_numeric, ssdata$spacing_cat)
summary(table(ssdata$section_numeric, ssdata$spacing_cat))

```




## Spacing bonferroni
### Intervention vs. Control Section 1
```{r}
sp_i_1 <- subset(ssdata, select = c(section_numeric, spacing_cat))

sp_i_1 <- subset(sp_i_1, section_numeric != 2)
sp_i_1
table(sp_i_1$section_numeric)


# aggregating 
#casting data frame to frequency counts

sp_i_1 <- table(sp_i_1$section_numeric, sp_i_1$spacing_cat)
sp_i_1

sum(sp_i_1)

# labeling 
dimnames(sp_i_1) <- list(section_numeric = c("Intervention", "Control Section 1"), spacing_cat = c("None", "Dec", "Inc", "Maint"))
library(fifer)
chisq.post.hoc(sp_i_1, control = "bonferroni", popsInRows = F)

```



### Intervention vs. Control Section 2
```{r}
sp_i_2 <- subset(ssdata, select = c(section_numeric, spacing_cat))

sp_i_2 <- subset(sp_i_2, section_numeric != 1)
sp_i_2
table(sp_i_2$section_numeric)

sp_i_2 <- table(sp_i_2$section_numeric, sp_i_2$spacing_cat)

# labeling 
dimnames(sp_i_2) <- list(section_numeric = c("Intervention", "Control Section 2"), spacing_cat = c("None", "Dec", "Inc", "Maint"))

chisq.post.hoc(sp_i_2, control = "bonferroni", popsInRows = F)

```





# SELF-TESTING PATTERNS
## (Table 4)
```{r}


# Self-Test

# 3. Maintained Strategy
# 2. Adopted Strategy
# 1. Stoped Utilizing Strategy
# 0. Never Utlized Strategy

ssdata$selftest_cat[ssdata$pre_ss_selftest == 1  & ssdata$post_ss_selftest == 1] <- 3
ssdata$selftest_cat[ssdata$pre_ss_selftest == 0 & ssdata$post_ss_selftest == 1] <- 2
ssdata$selftest_cat[ssdata$pre_ss_selftest == 1 & ssdata$post_ss_selftest == 0] <- 1
ssdata$selftest_cat[ssdata$pre_ss_selftest == 0 & ssdata$post_ss_selftest == 0] <- 0

# basic line graph of pre- and post- spacing
selftesttab <- subset(ssdata, select=c(selftest_cat, section))



# Reshaping data using reshape2 library
selftestmelted <- melt(selftesttab, id.vars=c("section"))
 

selftestmelted$st_catfac <- factor(selftestmelted$value, 
                             labels = c("Never Used", "Decreased",
                                        "Increased", "Maintained"))




library(Rmisc)
selftestmelted <- summarySE(selftestmelted, measurevar = "value", 
                      groupvars = c("section", "st_catfac"))


# reversing category order
selftestmelted$st_catfac <- factor(selftestmelted$st_catfac, 
                           levels = rev(levels(selftestmelted$st_catfac)))

# obtatining proportions for category by section

selftestmelted$n <- selftestmelted$N

selftestmelted<- selftestmelted %>%
  group_by(section, st_catfac) %>%
  summarise(n) %>%
  mutate(freq = n / sum(n)*100)




# Line graph of pre-post changes in spacing by section # spaceplot = 

# Version 1

st1 <- ggplot(selftestmelted, aes(x=section, y = freq)) + 
  geom_bar(aes(fill = st_catfac), stat = "identity") +
   labs(x = "Section", y = "Percent", fill = "", 
        title = "Self-Testing Patterns") +
   scale_fill_manual(values=c("#669966", "#99CC99", "#FF6666", "#CC0000"))



library(gridExtra)
g <- grid.arrange(st1, st2, ncol = 2)
ggsave("Y1 Self-Test Categories.png", width = 8, height = 4, units = "in", g)


selftestmelted


#Spacing and Self-Test Proportion Chart
g <-grid.arrange(space1, st1, ncol = 2)
ggsave("Y1 Space & Self-Test Categories.png", width = 8, height = 4, units = "in", g)

```


## Self-testing bonferroni
### Intervention vs. Control Section 1
```{r}

st_i_1 <- subset(ssdata, select = c(section_numeric, selftest_cat))

st_i_1 <- subset(st_i_1, section_numeric != 2)

st_i_1

table(st_i_1$section_numeric)

st_i_1 <- table(st_i_1$section_numeric, st_i_1$selftest_cat)

st_i_1
sum(st_i_1)

# labeling 
dimnames(st_i_1) <- list(section_numeric = c("Intervention", "Control Section 1"), selftest_cat = c("None", "Dec", "Inc", "Maint"))

chisq.post.hoc(st_i_1, control = "bonferroni", popsInRows = F)

```

## Self-testing bonferroni
### Intervention vs. Control Section 2
```{r}
st_i_2 <- subset(ssdata, select = c(section_numeric, selftest_cat))

st_i_2 <- subset(st_i_2, section_numeric != 1)
st_i_2
table(st_i_2$section_numeric)

st_i_2 <- table(st_i_2$section_numeric, st_i_2$selftest_cat)
st_i_2

# labeling 
dimnames(st_i_2) <- list(section_numeric = c("Intervention", "Control Section 2"), selftest_cat = c("None", "Dec", "Inc", "Maint"))

chisq.post.hoc(st_i_2, control = "bonferroni", popsInRows = F)

```





## Regression Model
## (Table 6)
Model with OIR data (age, gender, years, urm, sat)
```{r}
# with urm

ssdata$sat_zscore <- scale(ssdata$sattotalscore, center = T, scale = T)

describe(ssdata$sat_zscore)  

ssdata$spacing_cat.f <- factor(ssdata$spacing_cat)
ssdata$selftest_cat.f <- factor(ssdata$selftest_cat)
ssdata$section_numeric.f <- factor(ssdata$section_numeric)

results = lm(grade_rec ~  spacing_cat.f + selftest_cat.f + post_ss_condense + post_ss_flashc + section_numeric.f + yearsenrolled +
               gender_rec2 + urm + sat_zscore, data = ssdata)

results
summary(results)
confint(results, level = .95)

```




# CLICKSTREAM DATA
## Day 12-17

### Average clicks per day by Spacing vs. Cramming at Week 10
```{r, echo = F}
# POST study paterns (using that spacing_rec variable)
# experimenting with the time-series data

cs_tab1  <- subset(ssdata, section == "Intervention Section")


cs_tab1 <- subset(cs_tab1, select=c(post_spacing_recode, day13,	day14,	day15,	day16,	day17))



# removing NAs from post_studybeh_spaced

# making spaced a factor
cs_tab1$post_spacing_recode <- factor(cs_tab1$post_spacing_recode, labels = c("Crammed", "Spaced"))
table(cs_tab1$post_spacing_recode)

# reshaping data using reshape2 library
cstab_melted1 <- melt(cs_tab1, id.vars = c("post_spacing_recode"))

# summarizing data using summarySE
library(Rmisc)
cstab_melted1 <- summarySE(cstab_melted1, measurevar = "value", groupvars = c("variable", "post_spacing_recode"))

pd <- position_dodge(0.1)


csplot1 <- ggplot(cstab_melted1, aes(x=variable, y=value, group = post_spacing_recode, colour = post_spacing_recode)) +
    geom_line(size = 1) +  geom_point(size=2.5, shape= 1, fill= "white" ) + coord_fixed(ratio=.5) + 
  geom_errorbar(aes(ymin=value-se, ymax=value+se), width = .1, position = pd) +
  labs(x = "Week of Midterm 1", y = "Average Number of Clicks") + 
  theme(axis.text.x = element_text(size = 8,angle = 90, hjust = 1), 
        axis.title.x = element_text(size = 15)) + theme(legend.position="none") + theme(legend.position="bottom") + labs(colour = "") +
    scale_y_continuous(limits = c(0, 60), breaks = c(0, 10, 20, 30, 40, 50, 60))  + coord_fixed(ratio=.15) + scale_color_manual(values=c("#FF0033", "#33CCCC"))

csplot1


```

## Day 29-33

### Average clicks per day by Spacing vs. Cramming at Week 10
```{r, echo = F}
# POST study paterns (using that spacing_rec variable)
# experimenting with the time-series data

cs_tab2  <- subset(ssdata, section == "Intervention Section")


cs_tab2 <- subset(cs_tab2, select=c(post_spacing_recode, day29, day30, day31, day32, day33))



# making spaced a factor
cs_tab2$post_spacing_recode <- factor(cs_tab2$post_spacing_recode, labels = c("Crammed", "Spaced"))
table(cs_tab2$post_spacing_recode)


# reshaping data using reshape2 library
cstab_melted2 <- melt(cs_tab2, id.vars = c("post_spacing_recode"))


# summarizing data using summarySE
cstab_melted2 <- summarySE(cstab_melted2, measurevar = "value", groupvars = c("variable", "post_spacing_recode"))

pd <- position_dodge(0.1)


csplot2 <- ggplot(cstab_melted2, aes(x=variable, y=value, group = post_spacing_recode, colour = post_spacing_recode)) +
    geom_line(size = 1) +  geom_point(size=2.5, shape= 1, fill= "white" ) + coord_fixed(ratio=.5) + 
  geom_errorbar(aes(ymin=value-se, ymax=value+se), width = .1, position = pd) +
  labs(x = "Week of Midterm 2", y = "Average Number of Clicks") + 
  theme(axis.text.x = element_text(size = 8,angle = 90, hjust = 1), 
        axis.title.x = element_text(size = 15)) + theme(legend.position="none") + theme(legend.position="bottom") + labs(colour = "") +
    scale_y_continuous(limits = c(0, 60), breaks = c(0, 10, 20, 30, 40, 50, 60))  + coord_fixed(ratio=.15) + scale_color_manual(values=c("#FF0033", "#33CCCC"))

csplot2

```

## Day 50-54

### Average clicks per day by Spacing vs. Cramming at Week 10
```{r, echo = F}
# POST study paterns (using that spacing_rec variable)
# experimenting with the time-series data

cs_tab3  <- subset(ssdata, section == "Intervention Section")

cs_tab3 <- subset(cs_tab3, select=c(post_spacing_recode, day50, day51, day52, day53, day54))


# making spaced a factor
cs_tab3$post_spacing_recode <- factor(cs_tab3$post_spacing_recode, labels = c("Crammed", "Spaced"))
table(cs_tab3$post_spacing_recode)


# reshaping data using reshape2 library
cstab_melted3 <- melt(cs_tab3, id.vars = c("post_spacing_recode"))


# summarizing data using summarySE
cstab_melted3 <- summarySE(cstab_melted3, measurevar = "value", groupvars = c("variable", "post_spacing_recode"))

pd <- position_dodge(0.1)


csplot3 <- ggplot(cstab_melted3, aes(x=variable, y=value, group = post_spacing_recode, colour = post_spacing_recode)) +
    geom_line(size = 1) +  geom_point(size=2.5, shape= 1, fill= "white" ) + coord_fixed(ratio=.5) + 
  geom_errorbar(aes(ymin=value-se, ymax=value+se), width = .1, position = pd) +
  labs(x = "Week of Midterm 3", y = "Average Number of Clicks") + 
  theme(axis.text.x = element_text(size = 8,angle = 90, hjust = 1), 
        axis.title.x = element_text(size = 15)) + theme(legend.position="none") + theme(legend.position="bottom") + labs(colour = "") +
    scale_y_continuous(limits = c(0, 60), breaks = c(0, 10, 20, 30, 40, 50, 60))  + coord_fixed(ratio=.15) + scale_color_manual(values=c("#FF0033", "#33CCCC"))

csplot3

grid.arrange(csplot1, csplot2, csplot3, ncol = 3)
g <- grid.arrange(arrangeGrob(csplot1, csplot2, csplot3, ncol = 3))
ggsave("Y1 Clickstream Midterms w errobars.png", width = 8, height = 6, units = "in", g)


```




